"""猫眼电影榜单 TOP 100"""

from requests_html import HTMLSession
from collections import namedtuple
import pandas as pd
import re

header = ['rank', 'title', 'star', 'releasetime', 'integer', 'fraction']


def nth_page(sess, n):
    url = f'https://maoyan.com/board/4?offset={10*(n-1)}'
    pat = re.compile(r'''
        <i\ class="board-index.*?>(?P<{0}>.*?)</i>
        .*?{1}="(?P<{1}>.*?)"
        .*?{2}">\s*主演：(?P<{2}>.*?)\s*</p>
        .*?{3}">上映时间：(?P<{3}>.*?)</p>
        .*?{4}">(?P<{4}>.*?)</i>
        .*?{5}">(?P<{5}>.*?)</i>'''.format(*header), re.X | re.S)
    req = sess.get(url)
    return pat.findall(req.text)


movie_list = []
max_pages = 10
session = HTMLSession()
for i in range(1, max_pages + 1):
    movie_list.extend(nth_page(session, i))
pd.DataFrame(movie_list, columns=MovieRecord._fields)
